# ==============================================================================
# file name: SI-sample-statistics.R
# authors: Bernhard Clemm 
# date: January 13, 2022
# purpose: table for sample stats
# ==============================================================================

# SETUP ========================================================================

basedir <- paste0(dirname(dirname(
  rstudioapi::getSourceEditorContext()$path)), "/")
codedir <- paste0(basedir, "code/")
datadir <- paste0(basedir, "data/")

library(tidyverse)
library(xlsx)
library(kableExtra)

data_wide <- read.csv(paste0(datadir, "processed/data_wide.csv"))

# Demographics =================================================================

## US ####

# Population data
pop_us <- read.xlsx(
  paste0(datadir, "population/US_census_acs.xlsx"), sheetIndex = 5) %>%
  mutate(percent = proportion*100) %>% select(-proportion)

# Subset respondents
data_wide_us_w3 <- data_wide %>% 
  filter(country == "US" & !is.na(ResponseId_w3))

sample_stats_us <- pop_us %>%
  rename(Population = "percent") %>%
  right_join(., rbind(
    data.frame(prop.table(table(data_wide_us_w3$gender_cat))*100),
    data.frame(prop.table(table(data_wide_us_w3$age_cat))*100), 
    data.frame(prop.table(table(data_wide_us_w3$edu_cat))*100)) %>%
      rename(value = Var1, `Pre-survey` = Freq)) %>%
  mutate(across(where(is.numeric), round, 2)) %>%
  mutate(variable = case_when(
    variable == "gender_cat" ~ "Gender",
    variable == "age_cat" ~ "Age",
    variable == "edu_cat" ~ "Education"))

kable(sample_stats_us, 
      caption = "Demographics of population/sample (US)", 
      format = "latex", booktabs = T, escape = F, linesep = "",
      row.names = F, 
      col.names = c("","", "Population", "Pre-survey")) %>%
  collapse_rows(columns = 1, valign = "middle", latex_hline = "major") %>%
  save_kable(., file = paste0(tabdir, "sample_stats_us.tex"))

## PL ####

# Population data

pop_pl <- read.xlsx(
  paste0(datadir, "population/PL_panelariadna.xlsx"), sheetIndex = 1)%>%
  mutate(percent = proportion*100) %>% select(-proportion)
  
# Subset respondents
data_wide_pl_w2 <- data_wide %>% 
  filter(country == "PL" & !is.na(respondent_id_w2))

sample_stats_pl <- pop_pl %>%
  rename(Population = "percent") %>%
  right_join(., rbind(
    data.frame(prop.table(table(data_wide_pl_w2$gender_cat))*100),
    data.frame(prop.table(table(data_wide_pl_w2$age_cat))*100), 
    data.frame(prop.table(table(data_wide_pl_w2$edu_cat))*100)) %>%
      rename(value = Var1, `Pre-survey` = Freq)) %>%
  mutate(across(where(is.numeric), round, 2)) %>%
  mutate(variable = case_when(
    variable == "gender_cat" ~ "Gender",
    variable == "age_cat" ~ "Age",
    variable == "edu_cat" ~ "Education"))

kable(sample_stats_pl, 
      caption = "Demographics of population/sample (PL)", 
      format = "latex", booktabs = T, escape = F, linesep = "",
      row.names = F, 
      col.names = c("","", "Population", "Pre-survey")) %>%
  collapse_rows(columns = 1, valign = "middle", latex_hline = "major") %>%
  save_kable(., file = paste0(tabdir, "sample_stats_pl.tex"))

